home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Personal Computer World 2005 October
/
PCWOCT05.iso
/
Software
/
FromTheMag
/
XAMPP 1.4.14
/
xampp-win32-1.4.14-installer.exe
/
xampp
/
php
/
pear
/
Text
/
Statistics.php
< prev
next >
Wrap
PHP Script
|
2004-03-24
|
5KB
|
168 lines
<?php
/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
// +----------------------------------------------------------------------+
// | PHP version 4╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2002 The PHP Group╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.0 of the PHP license,╩╩╩╩╩╩╩|
// | that is bundled with this package in the file LICENSE, and is╩╩╩╩╩╩╩╩|
// | available at through the world-wide-web at╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
// | http://www.php.net/license/2_02.txt.╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
// | If you did not receive a copy of the PHP license and are unable to╩╩╩|
// | obtain it through the world-wide-web, please send a note to╩╩╩╩╩╩╩╩╩╩|
// | license@php.net so we can mail you a copy immediately.╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
// +----------------------------------------------------------------------+
// | Author: George Schlossnagle <george@omniti.com> |
// +----------------------------------------------------------------------+
//
// $Id$
/*
* Text_Statistics calculates some basic readability metrics on a
* block of text. The number of words, the number of sentences,
* and the number of total syllables is counted. These statistics
* can be used to calculate the Flesch score for a sentence, which
* is a number (usually between 0 and 100) that represents the
* readability of the text. A basic breakdown of scores is:
*
* 90 to 100 5th grade
* 80 to 90 6th grade
* 70 to 80 7th grade
* 60 to 70 8th and 9th grade
* 50 to 60 10th to 12th grade (high school)
* 30 to 50 college
* 0 to 30 college graduate
*
* More info can be read up on at
* http://www.mang.canterbury.ac.nz/courseinfo/AcademicWriting/Flesch.htm
*
* require 'Text/Statistics.php';
* $block = Text_Statistics($sometext);
* $block->flesch; // returns flesch score for $sometext
*
* see the unit tests for additional examples.
*
* @package Text_Statistics
* @author George Schlossnagle <george@omniti.com>
*/
require_once "Text/Word.php";
class Text_Statistics {
/*
* The document text.
*
* @var string
* @access public
*/
var $text = '';
/*
* The number of syllables in the document.
*
* @var number
* @access public
*/
var $numSyllables = 0;
/*
* The number of words in the document.
*
* @var number
* @access public
*/
var $numWords = 0;
/*
* The number of unique words in the document.
*
* @var number
* @access public
*/
var $uniqWords = 0;
/*
* The number of sentences in the document.
*
* @var number
* @access public
*/
var $numSentences = 0;
/*
* The Flesch score of the document.
*
* @var number
* @access public
*/
var $flesch = 0;
/*
* Some abbreviations we should expand. THis list could/should
* be much larger.
*
* @var number
* @access protected
*/
var $_abbreviations = array('/Mr\./' => 'Misterr',
'/Mrs\./i' => 'Misses', // Phonetic
'/etc\./i' => 'etcetera',
'/Dr\./i' => 'Doctor',
);
/*
* Constructor.
*
* @param string
* @access public
*/
function Text_Statistics($block)
{
$this->text = $block;
$this->_analyze();
}
/*
* Compute statistics for the document object.
*
* @access protected
*/
function _analyze()
{
$lines = explode("\n", $this->text);
foreach( $lines as $line ) {
$this->_analyze_line($line);
}
$this->flesch = 206.835 -
(1.015 * ($this->numWords/$this->numSentences)) -
(84.6 * ($this->numSyllables/$this->numWords));
}
/*
* Helper function, computes statistics on a given line.
*
* @param string
* @access protected
*/
function _analyze_line($line)
{
// expand abbreviations for counting syllables
$line = preg_replace(array_keys($this->_abbreviations),
array_values($this->_abbreviations),
$line);
preg_match_all("/\b(\w[\w'-]*)\b/", $line, $words);
foreach( $words[1] as $word ) {
$w_obj = new Text_Word($word);
$this->numSyllables += $w_obj->numSyllables();
$this->numWords++;
if($this->_uniques[strtolower($word)]++ == 0) {
$this->uniqWords++;
}
}
preg_match_all("/[.!?]/", $line, $matches);
$this->numSentences += count($matches[0]);
}
}
?>